from pyflink.table import TableEnvironment, EnvironmentSettings

table_env = TableEnvironment.create(EnvironmentSettings.in_batch_mode())


table_env.execute_sql('''
    CREATE TABLE students(
    id string,
    name string,
    age int,
    sex string,
    clazz string
    )with(
    'connector':'filesystem',
    'path' = 'hdfs://master:9000/data/students',
    'format' = 'csv',
    'csv.field-delimiter' = ','
    )
''')

table_env.execute_sql('''
    create table clazz_num(
        clazz string,
        num bigint
        ) with(
            'connector' = 'filesystem',           -- 必选：指定连接器类型
            'path' = 'hdfs://master:9000/data/clazz_num',  -- 必选：指定路径
            'format' = 'csv',                     -- 必选：文件系统连接器指定 format
            'csv.field-delimiter' = ',' -- 字段分隔符
        )
''')

table_env.execute_sql('''
    insert into clazz_num
    select clazz,count(1) as num
    from
    students
    group by
    clazz
''')